## New names:
## New names:
## New names:
## • `` -> `...1`
pilot_game_data_clean <- clean_prolific_data(pilot_game_data)
## Joining with `by = join_by(trial_numeric, died)`
pilot_game_data_distance <- create_distance_prolific(pilot_game_data_clean)
write_csv(pilot_behave_data_clean, path(here(), "munge", "prolific", "cleaned_pilot_behavior.csv"))
write_csv(pilot_game_data_clean, path(here(), "munge", "prolific", "cleaned_pilot_game_data.csv"))
write_csv(pilot_game_data_distance, path(here(), "munge", "prolific", "cleaned_pilot_distance_data.csv"))
write_csv(pilot_all_vars_df, path(here(), "munge", "prolific", "cleaned_pilot_across_trial_data.csv"))
People seem to be very good at the game– every player had at least one block where they did not have a new minigame (no more than 3 deaths). People get better over time, especially over the first two blocks.
There are two reasonable choices for characterizing risk or avoidance during the trial. The first is the distance to the ghost when the player turned away from the ghost and the second is the minimum distance from the ghost on any given trial. I tend to think that the former is a slightly better choice since it is more closely tied to player choice, whereas the min distance can also be driven by the ghost turning around when the subject didn’t expect it.
Arg for excluding such trials: If the person turns after a ghost has initiated a chase or attack, they are turning because they realized they are in danger. It is no longer really about approach/avoid they are under an active threat.
Arg for including such trials: These are the trials where the person gets pretty close to the ghost. We are throwing out data with meaningful choices and reducing our variability by throwing them out.
Obviously, the main thing is that it depends on the question. This came up in the context of Jules analysis of last_away, but it is also relevant for the modeling of turn around points. When modeling turn around points, it would be best to keep the data but have a variable that tracks whether or not a ghost or chase has been initiated. But that will also take more careful thinking, because if you feed the model before a chase then the model will not know if there will be a chase. But it could soak up variance that is being attributed to reward or threat, so is probably important.
In regards to last_away, the question is what drives how close people are willing to get to the ghost. If the ghost has initiated an action, we would predict that would want to make people stay far away from the ghost, but in particular the rational choice is to leave immediately regardless of distance. So I am inclined to keep excluding the trials.
Interestingly, when I compare excluding/including the distance for some subjects the average distance stays similar and for some subjects it differs wildly. I try to explore why this is below.
clean_distance_bob_data <- pilot_game_data_distance %>%
filter(away_choice == last_away & away_choice != 0 & attack_chase_bob == 'Bob' & dots_eaten > 0) %>%
mutate(dist_ghost_nobob = distance_to_ghost) %>%
select(subject, trial_numeric, dist_ghost_nobob)
clean_distance_data <- pilot_game_data_distance %>%
filter(away_choice == last_away & away_choice != 0 & dots_eaten > 0)%>%
select(subject, trial_numeric, distance_to_ghost)
distance_compare <- full_join(clean_distance_data, clean_distance_bob_data)
## Joining with `by = join_by(subject, trial_numeric)`
subs <- sample(clean_distance_data$subject, 10)
distance_compare %>%
filter(subject %in% subs) %>%
pivot_longer(cols = c(distance_to_ghost, dist_ghost_nobob), values_to = "last_away", names_to = "type") %>%
ggplot(., aes(x = subject, y = last_away, fill = type )) +
geom_boxplot(notch = T) +
theme(panel.background = element_rect(fill = "white"))
tmp_33_bob <- pilot_game_data_distance %>%
filter(away_choice == last_away & away_choice != 0 & attack_chase_bob == 'Bob' & dots_eaten > 0) %>%
filter(subject== "Subject_33") %>%
mutate(rewardgroup = if_else(reward_groups > 2, "large", "small"))
tmp_33 <- pilot_game_data_distance %>%
filter(away_choice == last_away & away_choice != 0 & dots_eaten > 0) %>%
filter(subject== "Subject_33") %>%
mutate(rewardgroup = if_else(reward_groups > 2, "large", "small"))
tmp_14_bob <- pilot_game_data_distance %>%
filter(away_choice == last_away & away_choice != 0 & attack_chase_bob == 'Bob' & dots_eaten > 0) %>%
filter(subject== "Subject_14") %>%
mutate(rewardgroup = if_else(reward_groups > 2, "large", "small"))
tmp_14 <- pilot_game_data_distance %>%
filter(away_choice == last_away & away_choice != 0 & dots_eaten > 0) %>%
filter(subject== "Subject_14") %>%
mutate(rewardgroup = if_else(reward_groups > 2, "large", "small"))
tmp_29_bob <- pilot_game_data_distance %>%
filter(away_choice == last_away & away_choice != 0 & attack_chase_bob == 'Bob' & dots_eaten > 0) %>%
filter(subject== "Subject_29") %>%
mutate(rewardgroup = if_else(reward_groups > 2, "large", "small"))
tmp_29 <- pilot_game_data_distance %>%
filter(away_choice == last_away & away_choice != 0 & dots_eaten > 0) %>%
filter(subject== "Subject_29") %>%
mutate(rewardgroup = if_else(reward_groups > 2, "large", "small"))
tmp_42_bob <- pilot_game_data_distance %>%
filter(away_choice == last_away & away_choice != 0 & attack_chase_bob == 'Bob' & dots_eaten > 0) %>%
filter(subject== "Subject_42") %>%
mutate(rewardgroup = if_else(reward_groups > 2, "large", "small"))
tmp_42 <- pilot_game_data_distance %>%
filter(away_choice == last_away & away_choice != 0 & dots_eaten > 0) %>%
filter(subject== "Subject_42") %>%
mutate(rewardgroup = if_else(reward_groups > 2, "large", "small"))
summary(lm(last_away ~ rewardgroup, data = tmp_33_bob))
##
## Call:
## lm(formula = last_away ~ rewardgroup, data = tmp_33_bob)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.1429 -6.1429 -0.6825 5.5873 24.7778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.143 1.057 45.526 <2e-16 ***
## rewardgroupsmall 1.079 1.485 0.727 0.469
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.848 on 140 degrees of freedom
## Multiple R-squared: 0.003759, Adjusted R-squared: -0.003357
## F-statistic: 0.5282 on 1 and 140 DF, p-value: 0.4686
summary(lm(last_away ~ rewardgroup, data = tmp_33))
##
## Call:
## lm(formula = last_away ~ rewardgroup, data = tmp_33)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.558 -6.478 -0.397 5.603 25.603
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46.558 1.106 42.10 <2e-16 ***
## rewardgroupsmall 1.839 1.559 1.18 0.24
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.705 on 153 degrees of freedom
## Multiple R-squared: 0.009012, Adjusted R-squared: 0.002535
## F-statistic: 1.391 on 1 and 153 DF, p-value: 0.24
summary(lm(last_away ~ rewardgroup, data = tmp_14_bob))
##
## Call:
## lm(formula = last_away ~ rewardgroup, data = tmp_14_bob)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.9167 -5.3521 0.6479 5.0833 17.0833
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40.9167 0.8264 49.510 <2e-16 ***
## rewardgroupsmall 2.4354 1.1729 2.077 0.0397 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.013 on 141 degrees of freedom
## Multiple R-squared: 0.02967, Adjusted R-squared: 0.02279
## F-statistic: 4.312 on 1 and 141 DF, p-value: 0.03966
summary(lm(last_away ~ rewardgroup, data = tmp_14))
##
## Call:
## lm(formula = last_away ~ rewardgroup, data = tmp_14)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.4167 -4.7000 -0.4167 5.3000 17.3000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40.7000 0.8572 47.48 <2e-16 ***
## rewardgroupsmall 1.7167 1.2006 1.43 0.155
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.424 on 151 degrees of freedom
## Multiple R-squared: 0.01336, Adjusted R-squared: 0.006824
## F-statistic: 2.044 on 1 and 151 DF, p-value: 0.1548
summary(lm(last_away ~ rewardgroup, data = tmp_29_bob))
##
## Call:
## lm(formula = last_away ~ rewardgroup, data = tmp_29_bob)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.5357 -6.1839 0.4643 6.4643 19.6000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 35.536 1.114 31.890 <2e-16 ***
## rewardgroupsmall 2.864 1.622 1.765 0.0804 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.339 on 104 degrees of freedom
## Multiple R-squared: 0.02909, Adjusted R-squared: 0.01976
## F-statistic: 3.116 on 1 and 104 DF, p-value: 0.08044
summary(lm(last_away ~ rewardgroup, data = tmp_29))
##
## Call:
## lm(formula = last_away ~ rewardgroup, data = tmp_29)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.014 -7.014 -1.014 8.986 24.986
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.0138889 1.2128702 27.22 <2e-16 ***
## rewardgroupsmall -0.0001903 1.7093732 0.00 1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.29 on 143 degrees of freedom
## Multiple R-squared: 8.663e-11, Adjusted R-squared: -0.006993
## F-statistic: 1.239e-08 on 1 and 143 DF, p-value: 0.9999
summary(lm(last_away ~ rewardgroup, data = tmp_42_bob))
##
## Call:
## lm(formula = last_away ~ rewardgroup, data = tmp_42_bob)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.5116 -7.5116 -0.5116 8.4884 25.5800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.512 1.576 24.436 <2e-16 ***
## rewardgroupsmall 3.908 2.149 1.818 0.0723 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.33 on 91 degrees of freedom
## Multiple R-squared: 0.03506, Adjusted R-squared: 0.02446
## F-statistic: 3.307 on 1 and 91 DF, p-value: 0.0723
summary(lm(last_away ~ rewardgroup, data = tmp_42))
##
## Call:
## lm(formula = last_away ~ rewardgroup, data = tmp_42)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.590 -7.769 -1.590 8.871 30.410
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.308 1.407 24.385 <2e-16 ***
## rewardgroupsmall 3.281 2.034 1.613 0.109
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.02 on 138 degrees of freedom
## Multiple R-squared: 0.01851, Adjusted R-squared: 0.0114
## F-statistic: 2.603 on 1 and 138 DF, p-value: 0.1089
# subject 33 stayed further back than other subjects
tmp_33_bob %>%
ggplot(aes(x = dots_eaten)) +
geom_bar() +
theme(panel.background = element_rect(fill = "white"))
# What made them go for the 4th reward? it doesn't seem to matter if the reward was large or not#
tmp_33_bob %>%
mutate(reward_4_large = if_else(reward_groups %in% c(2, 4), "large", "small")) %>%
ggplot(., aes(x = factor(dots_eaten), y = last_away, fill = reward_4_large)) +
geom_boxplot(notch = F) +
geom_point(aes(color = reward_4_large), color = "black", position = position_dodge(width = .8)) +
theme(panel.background = element_rect(fill = "white")) +
ggtitle("4 dot large") + labs(fill = "")
tmp_33_bob %>%
mutate(reward_4_large = if_else(reward_groups %in% c(2, 4), "large", "small")) %>%
ggplot(., aes(x = factor(dots_eaten), y = last_away, fill = rewardgroup)) +
geom_boxplot(notch = F) +
geom_point(aes(color = rewardgroup), color = "black", position = position_dodge(width = .8)) +
theme(panel.background = element_rect(fill = "white")) +
ggtitle("last two dots large") + labs(fill = "")
# Was it the direction of the ghost
tmp2_33_bob <- pilot_game_data_distance %>%
filter(attack_chase_bob == 'Bob' & dots_eaten >= 3) %>%
filter(subject== "Subject_33") %>%
filter(Eaten > 2) %>%
mutate(rewardgroup = if_else(reward_groups > 2, "large", "small")) %>%
group_by(trial_numeric) %>%
mutate(ghost_dir_at_dot_3 = first(ghost_direction)) %>%
select(trial_numeric, Eaten, Lives, ghost_dir_at_dot_3) %>%
distinct()
tmp2_33_bob %>%
ggplot(., aes(fill = ghost_dir_at_dot_3, x = factor(Eaten)))+
geom_bar(position= position_dodge2()) +
theme(panel.background = element_rect(fill = "white"))
tmp2_33_bob %>%
ggplot(., aes(fill = factor(Lives), x = factor(Eaten)))+
geom_bar(position= position_dodge2()) +
theme(panel.background = element_rect(fill = "white"))
# Found it! This subject would only get the 4th dot if they had full lives, they never lost a minigame!
pilot_game_data_distance %>%
filter(subject== "Subject_33") %>%
filter(trial_flip == 1) %>%
ggplot(., aes(fill = factor(Lives), x = factor(dots_eaten)))+
geom_bar(position= position_dodge2()) +
theme(panel.background = element_rect(fill = "white"))
## Adding missing grouping variables: `block`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Adding missing grouping variables: `block`
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.